********************************************************************************
*Clean raw ACS data 
*5 & 1YR ESTIMATES, COUNTY LEVEL, 2009-2018
********************************************************************************
global acs "\replication\public_data\acs_data"

cd "$acs"
capture mkdir output

********************************************************************************
*0. Clean raw data
*1yr
forvalues i= 2009/2018 {
	use "acs_`i'_1.dta", clear
	rename FIPS fcnty 
	ren A00001_001 tot_pop 
	ren A02001_002 male
	egen elderly = rowtotal(A01001_011 A01001_012 A01001_013)
	ren A03001_002 white
	ren A12002_001 edu_denom
	ren A12002_003 highschool
	ren A12002_005 college
	ren A17005_001 unempl_denom
	ren A17005_003 unempl
	ren B13004_001 pov_denom
	ren B13004_002 poverty
	
	capture ren A20001_001 ins_denom
	capture ren A20001_002 no_ins
	capture ren A20001_004 pub_ins
	capture ren A20001_005 priv_ins
	
	capture ren A20002_007 ins_18_denom
	capture ren A20002_012 ins_25_denom
	capture ren A20002_017 ins_35_denom
	capture ren A20002_011 priv_ins_18
	capture ren A20002_016 priv_ins_25
	capture ren A20002_021 priv_ins_35

keep fcnty tot_pop male elderly white *denom highschool college unempl poverty no_ins pub_ins priv* 	
	gen year = `i'
	tempfile acs_`i'_1clean
	save `acs_`i'_1clean', replace
}

*5yr
forvalues i= 2009/2018 {
	*Named after final year
	use "acs_`i'_5.dta", clear
	rename FIPS fcnty 
	ren A00001_001 tot_pop 
	ren A02001_002 male
	egen elderly = rowtotal(A01001_011 A01001_012 A01001_013)
	ren A03001_002 white
	ren A12002_001 edu_denom
	ren A12002_003 highschool
	ren A12002_005 college
	ren A17005_001 unempl_denom
	ren A17005_003 unempl
	ren B13004_001 pov_denom
	ren B13004_002 poverty
	
	capture ren A20001_001 ins_denom
	capture ren A20001_002 no_ins
	capture ren A20001_004 pub_ins
	capture ren A20001_005 priv_ins
	
	capture ren A20002_007 ins_18_denom
	capture ren A20002_012 ins_25_denom
	capture ren A20002_017 ins_35_denom
	capture ren A20002_011 priv_ins_18
	capture ren A20002_016 priv_ins_25
	capture ren A20002_021 priv_ins_35

	gen year = `i'
	tempfile acs_`i'_5clean
	save `acs_`i'_5clean', replace
}

********************************************************************************
*1. Append years
use `acs_2009_1clean', clear
	forvalues i= 2010/2018 {
		append using `acs_`i'_1clean'
	}
	tab ye
save "acs_1_clean.dta", replace	

use `acs_2009_5clean', clear
	forvalues i= 2010/2018 {
		append using `acs_`i'_5clean'
	}
	tab ye
	keep fcnty year tot_pop male elderly white *denom highschool college unempl poverty no_ins pub_ins priv* 	
	tab ye, sum(no_ins)
	*ins only avail 2012+ in 5yr
save "acs_5_clean.dta", replace	


********************************************************************************
*2. Use 1yr estimates where possible, 5yr midpoints where not 
	use "${acs}\acs_5_clean.dta", clear
	*update year to reflect 5yr midpoint where possible
	replace year = year - 2
	append using `acs_2018_5clean'
	replace ye = 2017 if ye==2018
	append using `acs_2018_5clean'
	keep fcnty year tot_pop male elderly white *denom highschool college unempl poverty no_ins pub_ins priv* 	
	tab ye
	gen source = "5yr"
	
	merge 1:1 fcnty ye using "acs_1_clean.dta"
	keep if _m==1
	drop _m
	append using "acs_1_clean.dta"
	replace source = "1yr" if missing(source)
	tab ye source
	keep if ye > 2008
	qui compress 
	save "output\acs_clean", replace
	
********************************************************************************
*3. Create control vars
	replace male    = male /tot_pop
	replace white   = white /tot_pop
	replace elderly = elderly /tot_pop

	replace highschool = highschool / edu_denom 
	replace college    = college / edu_denom 
	replace unempl     = unempl / unempl_denom 
	replace poverty    = poverty / pov_denom 

	replace no_ins   = no_ins / ins_denom 
	replace pub_ins  = pub_ins / ins_denom 
	replace priv_ins = priv_ins / ins_denom 
	replace priv_ins_18 = priv_ins_18 / ins_18_denom 
	replace priv_ins_25 = priv_ins_25 / ins_25_denom 
	replace priv_ins_35 = priv_ins_35 / ins_35_denom 
	drop *denom 

	label var elderly "65+"
	label var source "ACS 1 or 5yr"		
	destring fcnty, replace
	qui compress
	save "output\acs_clean.dta", replace